import requests

from spider_utils.csv_utils import img_data_to_csv
from spider_utils.img_utils import get_img_mes
from bs4 import BeautifulSoup

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                  'Chrome/89.0.4389.114 Safari/537.36 '
}

session = requests.session()

# 1-375
min_page = 1
max_page = 375
baseURL = 'https://tupian.gq/'

# 源站点
src_site = 'https://tupian.gq'

# 获取每个图片url等信息
img_data_list = []

i = 1

# for page in range(min_page, max_page+1):
for page in range(1, 2):
    print(i)
    i = i+1

    r = session.request('GET', baseURL+str(page)+'.html', headers=headers)
    soup = BeautifulSoup(r.text, 'html.parser')
    try:
        imgs = soup.select('img')
        divs = soup.select('.post-item')
        tags = soup.select('meta[name="keywords"]')[0]['content']
        for div in divs:
            img_data = {}
            img_data['src_site'] = src_site
            img_data['tags'] = tags
            img_data['src'] = div['data-src']
            img_data['outer_net'] = 0
            # 获取其他的一些数据
            orther_mes = get_img_mes(div['data-src'])
            img_data['colors'] = orther_mes['colors']
            img_data['size'] = orther_mes['size']
            img_data_list.append(img_data)
    except:
        print('err')
        continue

print(img_data_list)
# img_data_to_csv(img_data_list, img_data_list[0].keys(), 'data/tupian_data.csv')
